import plotly.graph_objects as go
import plotly.figure_factory as ff
import plotly
import plotly.io as pio
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams.update({'figure.max_open_warning': 0})
%matplotlib inline
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
pio.templates
Templates configuration
-----------------------
Default template: 'plotly'
Available templates:
['ggplot2', 'seaborn', 'simple_white', 'plotly',
'plotly_white', 'plotly_dark', 'presentation', 'xgridoff',
'ygridoff', 'gridon', 'none']
filename = 'caso_full.csv'
#df = pd.read_csv(filename, header=0, index_col=0, parse_dates=True, squeeze=True, nrows=100000)
df = pd.read_csv(filename, header=0, index_col=0, parse_dates=True, squeeze=True)
df = df[df['place_type'] == 'state']
df
| city_ibge_code | date | epidemiological_week | estimated_population | estimated_population_2019 | is_last | is_repeated | last_available_confirmed | last_available_confirmed_per_100k_inhabitants | last_available_date | last_available_death_rate | last_available_deaths | order_for_place | place_type | state | new_confirmed | new_deaths | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| city | |||||||||||||||||
| NaN | 12.0 | 2020-03-17 | 202012 | 894470.0 | 881935.0 | False | False | 3 | 0.33539 | 2020-03-17 | 0.0000 | 0 | 1 | state | AC | 3 | 0 |
| NaN | 12.0 | 2020-03-18 | 202012 | 894470.0 | 881935.0 | False | False | 3 | 0.33539 | 2020-03-18 | 0.0000 | 0 | 2 | state | AC | 0 | 0 |
| NaN | 12.0 | 2020-03-19 | 202012 | 894470.0 | 881935.0 | False | False | 4 | 0.44719 | 2020-03-19 | 0.0000 | 0 | 3 | state | AC | 1 | 0 |
| NaN | 12.0 | 2020-03-20 | 202012 | 894470.0 | 881935.0 | False | False | 7 | 0.78259 | 2020-03-20 | 0.0000 | 0 | 4 | state | AC | 3 | 0 |
| NaN | 12.0 | 2020-03-21 | 202012 | 894470.0 | 881935.0 | False | False | 11 | 1.22978 | 2020-03-21 | 0.0000 | 0 | 5 | state | AC | 4 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| NaN | 17.0 | 2021-08-07 | 202131 | 1590248.0 | 1572866.0 | False | False | 212734 | 13377.41032 | 2021-08-07 | 0.0168 | 3564 | 508 | state | TO | 159 | 4 |
| NaN | 17.0 | 2021-08-08 | 202132 | 1590248.0 | 1572866.0 | False | False | 212851 | 13384.76766 | 2021-08-08 | 0.0168 | 3569 | 509 | state | TO | 117 | 5 |
| NaN | 17.0 | 2021-08-09 | 202132 | 1590248.0 | 1572866.0 | False | False | 213262 | 13410.61268 | 2021-08-09 | 0.0168 | 3576 | 510 | state | TO | 411 | 7 |
| NaN | 17.0 | 2021-08-10 | 202132 | 1590248.0 | 1572866.0 | True | False | 213478 | 13424.19547 | 2021-08-10 | 0.0168 | 3585 | 511 | state | TO | 216 | 9 |
| NaN | 17.0 | 2021-08-11 | 202132 | 1590248.0 | 1572866.0 | False | True | 213478 | 13424.19547 | 2021-08-10 | 0.0168 | 3585 | 512 | state | TO | 0 | 0 |
13963 rows × 17 columns
df.info()
<class 'pandas.core.frame.DataFrame'> Index: 13963 entries, nan to nan Data columns (total 17 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 city_ibge_code 13963 non-null float64 1 date 13963 non-null object 2 epidemiological_week 13963 non-null int64 3 estimated_population 13963 non-null float64 4 estimated_population_2019 13963 non-null float64 5 is_last 13963 non-null bool 6 is_repeated 13963 non-null bool 7 last_available_confirmed 13963 non-null int64 8 last_available_confirmed_per_100k_inhabitants 13957 non-null float64 9 last_available_date 13963 non-null object 10 last_available_death_rate 13963 non-null float64 11 last_available_deaths 13963 non-null int64 12 order_for_place 13963 non-null int64 13 place_type 13963 non-null object 14 state 13963 non-null object 15 new_confirmed 13963 non-null int64 16 new_deaths 13963 non-null int64 dtypes: bool(2), float64(5), int64(6), object(4) memory usage: 1.7+ MB
df.describe()
| city_ibge_code | epidemiological_week | estimated_population | estimated_population_2019 | last_available_confirmed | last_available_confirmed_per_100k_inhabitants | last_available_death_rate | last_available_deaths | order_for_place | new_confirmed | new_deaths | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 13963.000000 | 13963.000000 | 1.396300e+04 | 1.396300e+04 | 1.396300e+04 | 13957.000000 | 13963.000000 | 13963.000000 | 13963.000000 | 13963.000000 | 13963.000000 |
| mean | 29.172456 | 202068.212991 | 7.917636e+06 | 7.857543e+06 | 2.891813e+05 | 4541.170490 | 0.027477 | 7973.330874 | 259.107713 | 1450.648213 | 40.550956 |
| std | 12.759476 | 42.966500 | 9.242119e+06 | 9.176868e+06 | 4.518302e+05 | 4003.034959 | 0.017022 | 14491.827926 | 149.351443 | 2466.301158 | 84.406521 |
| min | 11.000000 | 202009.000000 | 6.311810e+05 | 6.057610e+05 | 0.000000e+00 | 0.002160 | 0.000000 | 0.000000 | 1.000000 | -2845.000000 | -84.000000 |
| 25% | 17.000000 | 202030.000000 | 2.809394e+06 | 2.778986e+06 | 4.782950e+04 | 1226.610930 | 0.019400 | 943.000000 | 130.000000 | 239.000000 | 5.000000 |
| 50% | 27.000000 | 202048.000000 | 4.064052e+06 | 4.018650e+06 | 1.506680e+05 | 3559.747640 | 0.023600 | 3539.000000 | 259.000000 | 701.000000 | 15.000000 |
| 75% | 41.000000 | 202114.000000 | 9.616621e+06 | 9.557071e+06 | 3.367055e+05 | 7046.182350 | 0.030100 | 8766.500000 | 388.000000 | 1602.500000 | 39.000000 |
| max | 53.000000 | 202132.000000 | 4.628933e+07 | 4.591905e+07 | 4.138421e+06 | 19220.318740 | 0.222200 | 141664.000000 | 534.000000 | 64036.000000 | 1389.000000 |
estados = df['state'].unique()
estados
array(['AC', 'AL', 'AM', 'AP', 'BA', 'CE', 'DF', 'ES', 'GO', 'MA', 'MG',
'MS', 'MT', 'PA', 'PB', 'PE', 'PI', 'PR', 'RJ', 'RN', 'RO', 'RR',
'RS', 'SC', 'SE', 'SP', 'TO'], dtype=object)
totalmortes = df.groupby('state')['new_deaths'].sum()
fig = go.Figure()
fig.add_trace(go.Bar(
y=totalmortes,
x=totalmortes.index,
orientation='v',
text=totalmortes,
textposition='auto'
))
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide', template='ggplot2', title_text='Total de Mortes por Estado')
fig.update_yaxes(visible=False, showticklabels=False)
fig.show()
estado = df['state'].unique()
fig = go.Figure()
for state in estado:
fig.add_trace(go.Violin(x=df['state'][df['state'] == state],
y=df['new_deaths'][df['state'] == state],
name=state,
box_visible=True,
meanline_visible=True))
fig.update_layout(template='ggplot2', title_text='Análise de Mortes por Estado')
fig.show()
for uf in estados:
df2 = df
df2 = df2[df2['state'] == uf]
series2 = df2.groupby(df2['date'])['new_deaths'].sum()
rolling2 = series2.rolling(window=7)
rolling_mean2 = rolling2.mean()
plt.figure(figsize=(15,3))
plt.title('Média Móvel: ' + uf)
rolling_mean2.plot()
#plt.show()
for uf in estados:
df2 = df
df2 = df2[df2['state'] == uf]
series2 = df2.groupby(df2['date'])['new_deaths'].sum()
series2 = series2.clip(lower=0)
rolling2 = series2.rolling(window=7)
rolling_mean2 = rolling2.mean()
fig = go.Figure()
fig.add_trace(go.Bar(
y=series2,
x=series2.index,
name='Mortes',
orientation='v',
marker=dict(
color='rgba(169,169,169, 0.6)',
line=dict(color='rgba(169,169,169, 1.0)', width=3)
)
))
fig.add_trace(go.Line(
y=rolling_mean2,
x=rolling_mean2.index,
name='Média Movel',
marker=dict(
color='rgba(165,42,42, 1)',
line=dict(color='rgba(165,42,42, 1)', width=4)
)
))
fig.update_layout(barmode='stack')
fig.update_layout(title_text=uf + ': Total de Mortes e Média Móvel', template='ggplot2')
fig.update_layout(hovermode="x unified")
fig.update_layout(hoverlabel=dict(bgcolor="white", font_size=16, font_family="Rockwell"))
fig.show()
C:\Users\Liu\anaconda3\lib\site-packages\plotly\graph_objs\_deprecations.py:378: DeprecationWarning: plotly.graph_objs.Line is deprecated. Please replace it with one of the following more specific types - plotly.graph_objs.scatter.Line - plotly.graph_objs.layout.shape.Line - etc.